In [ ]:
%run "../Functions/4. User comparison.ipynb"

Prepare data

Sample selection


In [ ]:
gfdf = gfdfPlaytestPhase1PretestPosttestUniqueProfilesVolunteers.copy()
rmdf = rmdfPlaytestPhase1PretestPosttestUniqueProfilesVolunteers.copy()

# interferes with getCorrections, which is gform.index-based
#gfdf.index = range(0, len(gfdf))

Question renaming


In [ ]:
# Rename columns of the Google Forms table with tags independantly of form language
columnTagsDictionary = {
    QTimestamp : 'timestamp',
    QCuriosityBiology : 'QCuriosityBiology',
    QCuriositySyntheticBiology : 'QCuriositySyntheticBiology',
    QCuriosityVideoGames : 'QCuriosityVideoGames',
    QCuriosityEngineering : 'QCuriosityEngineering',
    QPlayed : 'previousPlay',
    QAge : 'age',
    QGender : 'gender',
    QInterestVideoGames : 'gameInterest',
    QInterestBiology : 'biologyInterest',
    QStudiedBiology : 'biologyStudy',
    QPlayVideoGames : 'gameFrequency',
    QHeardSynBioOrBioBricks : 'synthBioKnowledge',
    QVolunteer : 'QVolunteer',
    QEnjoyed : 'QEnjoyed',
    QGenotypePhenotype : 'QGenotypePhenotype',
    QBioBricksDevicesComposition : 'QBioBricksDevicesComposition',
    QAmpicillin : 'QAmpicillin',
    QBBNamePlasmid : 'QBBNamePlasmid',
    QBBFunctionTER : 'QBBFunctionTER',
    QBBNamePromoter : 'QBBNamePromoter',
    QBBFunctionGameCDS : 'QBBFunctionGameCDS',
    QBBNameTerminator : 'QBBNameTerminator',
    QBBFunctionBiologyCDS : 'QBBFunctionBiologyCDS',
    QBBNameRBS : 'QBBNameRBS',
    QBBExampleCDS : 'QBBExampleCDS',
    QBBNameCDS : 'QBBNameCDS',
    QBBFunctionPR : 'QBBFunctionPR',
    QBBFunctionRBS : 'QBBFunctionRBS',
    QBBFunctionPlasmid : 'QBBFunctionPlasmid',
    QBBNameOperator : 'QBBNameOperator',
    QDeviceRbsPconsFlhdcTer : 'QDeviceRbsPconsFlhdcTer',
    QDevicePconsRbsFlhdcTer : 'QDevicePconsRbsFlhdcTer',
    QDevicePbadRbsGfpTer : 'QDevicePbadRbsGfpTer',
    QDevicePbadGfpRbsTer : 'QDevicePbadGfpRbsTer',
    QDeviceGfpRbsPconsTer : 'QDeviceGfpRbsPconsTer',
    QDevicePconsGfpRbsTer : 'QDevicePconsGfpRbsTer',
    QDeviceAmprRbsPconsTer : 'QDeviceAmprRbsPconsTer',
    QDeviceRbsPconsAmprTer : 'QDeviceRbsPconsAmprTer',
    QGreenFluorescence : 'QGreenFluorescence',
    QUnequipDevice : 'QUnequipDevice',
    QDevicePbadRbsAraTer : 'QDevicePbadRbsAraTer',
    QRemarks : 'comments',
    QUserId : 'anonymousID',
    QLanguage : 'lang',
    QTemporality : 'temporality',
}
columnQuestions = gfdf.columns.values.tolist()
googleData = gfdf.rename(columns=columnTagsDictionary)
#googleData.head()

In [ ]:
# Replaces answers to scientific questions in the questionnaires by their values (True or False)
correctedData = googleData.copy()
scientificQuestions = pd.Index([columnTagsDictionary[question] for question in scientificQuestions])

for index in range(correctedData.shape[0]):
    # Get the correction for each subject
    rowId = correctedData.index[index]
    playerId = correctedData.loc[rowId,"anonymousID"]
    correction = getCorrections(playerId, gfdf).rename(index=columnTagsDictionary)
    if correction.shape[1] > 0:
        # If subject has answered questionnaire
        # Replace scientific answers by their correction
        for question in scientificQuestions:
            correctedData.loc[rowId, question] = int(correction.loc[question, "corrections" + str(rowId)])
    
correctedData = correctedData.rename(columns=columnTagsDictionary)
#correctedData.tail(15)

For association rule mining


In [ ]:
# Get only answers to scientific questions
correctedScientific = correctedData.loc[:, scientificQuestions]
#correctedScientific.head()

In [ ]:
correctedScientific

For clustering

With full answers


In [ ]:
# Remove timestamp and comments features
codedData = googleData.copy().drop(['timestamp', "comments"], axis=1)
codedData["gameFrequency"].unique()

In [ ]:
# Code answers with integers when possible

# Define equivalences
# QCuriosityBiology
QCuriosityBiologyCoding = {"A lot": 4, "Beaucoup": 4, "Enormément": 5, "Énormément": 5, "Extremely": 5, "Moderately": 3, "Moyennement": 3, "Slightly": 2, "Un peu": 2, "I don't know": 3, "Je ne sais pas": 3, "Not at all": 1, "Pas du tout": 1}
# QCuriositySyntheticBiology
QCuriositySyntheticBiologyCoding = {"A lot": 4, "Beaucoup": 4, "Enormément": 5, "Énormément": 5, "Extremely": 5, "Moderately": 3, "Moyennement": 3, "Slightly": 2, "Un peu": 2, "I don't know": 3, "Je ne sais pas": 3, "Not at all": 1, "Pas du tout": 1}
# QCuriosityEngineering
QCuriosityEngineeringCoding = {"A lot": 4, "Beaucoup": 4, "Enormément": 5, "Énormément": 5, "Extremely": 5, "Moderately": 3, "Moyennement": 3, "Slightly": 2, "Un peu": 2, "I don't know": 3, "Je ne sais pas": 3, "Not at all": 1, "Pas du tout": 1}
# QCuriosityVideoGames
QCuriosityVideoGamesCoding = {"A lot": 4, "Beaucoup": 4, "Enormément": 5, "Énormément": 5, "Extremely": 5, "Moderately": 3, "Moyennement": 3, "Slightly": 2, "Un peu": 2, "I don't know": 3, "Je ne sais pas": 3, "Not at all": 1, "Pas du tout": 1}
# gameInterest
gameInterestCoding = {"A lot": 4, "Beaucoup": 4, "Enormément": 5, "Énormément": 5, "Extremely": 5, "Moderately": 3, "Moyennement": 3, "Slightly": 2, "Un peu": 2, "I don't know": 3, "Je ne sais pas": 3, "Not at all": 1, "Pas du tout": 1}
# biologyStudy
biologyStudyCoding = {"Not even in middle school": 0, "Jamais": 0, "Jamais, pas même au collège": 0, "Until the end of middle school": 1, "Jusqu'au brevet": 1, "Until the end of high school": 2, "Jusqu'au bac": 2, "Until bachelor's degree": 3, "Jusqu'à la license": 3, "At least until master's degree": 4, "Au moins jusqu'au master": 4, "I don't know": 0, "Je ne sais pas": 0}
# biologyInterest
biologyInterestCoding = {"A lot": 4, "Beaucoup": 4, "Enormément": 5, "Énormément": 5, "Extremely": 5, "Moderately": 3, "Moyennement": 3, "Slightly": 2, "Un peu": 2, "I don't know": 3, "Je ne sais pas": 3, "Not at all": 1, "Pas du tout": 1}
# gameFrequency
gameFrequencyCoding = {"A lot": 4, "Beaucoup": 4, "Enormément": 5, "Énormément": 5, "Extremely": 5, "Moderately": 3, "Moyennement": 3, "Rarely": 2, "Un peu": 2, "I don't know": 3, "Je ne sais pas": 3, "Not at all": 1, "Pas du tout": 1}
# synthBioKnowledge
synthBioKnowledgeCoding = {"Yes, and I know what it means" : 2, "Yes, but I don't exactly know what it means": 1, "No": 0}
# previousPlay
previousPlayCoding = {"I played it multiple times recently": 3, "I played it multiple times recently on this computer": 3, "I played recently on an other computer": 2, "I played it some time ago": 1, "I just played for the first time": 1, "No / not yet": 0, "I don't know": 0}
# lang
languageCoding = {"en": 0, "fr": 1}
# temporality
temporalityCoding = {"pretest": 0, "posttest": 1, "undefined": -5}
# QVolunteer
QVolunteerCoding = {"Yes": 1, "No": 0}
# QEnjoyed
QEnjoyedCoding = {'Extremely': 4, 'A lot': 3, 'Not at all': 0, 'A bit': 1, 'Moderately': 2, "No": 0, "Not applicable: not played yet": -1}

# Fill NaN cells
codedData["QCuriosityBiology"].fillna("Moderately", inplace = True)
codedData["QCuriositySyntheticBiology"].fillna("Moderately", inplace = True)
codedData["QCuriosityEngineering"].fillna("Moderately", inplace = True)
codedData["biologyInterest"].fillna("Moderately", inplace = True)
codedData["QCuriosityVideoGames"].fillna("Moderately", inplace = True)
codedData["gameInterest"].fillna("Moderately", inplace = True)
codedData["gameFrequency"].fillna("Moderately", inplace = True)
codedData["synthBioKnowledge"].fillna("No", inplace = True)
codedData["previousPlay"].fillna("No / not yet", inplace = True)
codedData["QVolunteer"].fillna("No", inplace = True)
codedData["QEnjoyed"].fillna("No", inplace = True)
codedData["biologyStudy"].fillna("I don't know", inplace = True)
codedData.dropna(how='any', inplace = True) # Drop those who have not answered the pretest

# Replace by code
for rowId in codedData.index.values:
    codedData.loc[rowId, "QCuriosityBiology"] = QCuriosityBiologyCoding[codedData.loc[rowId, "QCuriosityBiology"]]
    codedData.loc[rowId, "QCuriositySyntheticBiology"] = QCuriositySyntheticBiologyCoding[codedData.loc[rowId, "QCuriositySyntheticBiology"]]
    codedData.loc[rowId, "QCuriosityEngineering"] = QCuriosityEngineeringCoding[codedData.loc[rowId, "QCuriosityEngineering"]]
    codedData.loc[rowId, "QCuriosityVideoGames"] = QCuriosityVideoGamesCoding[codedData.loc[rowId, "QCuriosityVideoGames"]]
    codedData.loc[rowId, "gameInterest"] = gameInterestCoding[codedData.loc[rowId, "gameInterest"]]
    codedData.loc[rowId, "biologyStudy"] = biologyStudyCoding[codedData.loc[rowId, "biologyStudy"]]
    codedData.loc[rowId, "biologyInterest"] = biologyInterestCoding[codedData.loc[rowId, "biologyInterest"]]
    codedData.loc[rowId, "gameFrequency"] = gameFrequencyCoding[codedData.loc[rowId, "gameFrequency"]]
    codedData.loc[rowId, "synthBioKnowledge"] = synthBioKnowledgeCoding[codedData.loc[rowId, "synthBioKnowledge"]]
    codedData.loc[rowId, "previousPlay"] = previousPlayCoding[codedData.loc[rowId, "previousPlay"]]
    codedData.loc[rowId, "lang"] = languageCoding[codedData.loc[rowId, "lang"]]
    codedData.loc[rowId, "temporality"] = temporalityCoding[codedData.loc[rowId, "temporality"]]
    codedData.loc[rowId, "QVolunteer"] = QVolunteerCoding[codedData.loc[rowId, "QVolunteer"]]
    codedData.loc[rowId, "QEnjoyed"] = QEnjoyedCoding[codedData.loc[rowId, "QEnjoyed"]]

In [ ]:
# One-Hot version
codedData = pd.get_dummies(codedData, prefix = ["gender", "QGenotypePhenotype", "QBioBricksDevicesComposition", "QAmpicillin", "QBBNamePlasmid", "QBBFunctionTER", "QBBNamePromoter", "QBBFunctionGameCDS", "QBBNameTerminator", "QBBFunctionBiologyCDS", "QBBNameRBS", "QBBExampleCDS", "QBBNameCDS", "QBBFunctionPR", "QBBFunctionRBS", "QBBFunctionPlasmid", "QBBNameOperator", "QDeviceRbsPconsFlhdcTer", "QDevicePconsRbsFlhdcTer", "QDevicePbadRbsGfpTer", "QDevicePbadGfpRbsTer", "QDeviceGfpRbsPconsTer", "QDevicePconsGfpRbsTer", "QDeviceAmprRbsPconsTer", "QDeviceRbsPconsAmprTer", "QGreenFluorescence", "QUnequipDevice", "QDevicePbadRbsAraTer"], columns = ["gender", "QGenotypePhenotype", "QBioBricksDevicesComposition", "QAmpicillin", "QBBNamePlasmid", "QBBFunctionTER", "QBBNamePromoter", "QBBFunctionGameCDS", "QBBNameTerminator", "QBBFunctionBiologyCDS", "QBBNameRBS", "QBBExampleCDS", "QBBNameCDS", "QBBFunctionPR", "QBBFunctionRBS", "QBBFunctionPlasmid", "QBBNameOperator", "QDeviceRbsPconsFlhdcTer", "QDevicePconsRbsFlhdcTer", "QDevicePbadRbsGfpTer", "QDevicePbadGfpRbsTer", "QDeviceGfpRbsPconsTer", "QDevicePconsGfpRbsTer", "QDeviceAmprRbsPconsTer", "QDeviceRbsPconsAmprTer", "QGreenFluorescence", "QUnequipDevice", "QDevicePbadRbsAraTer"])
codedData.head()

In [ ]:
# Split the forms according to temporality
beforeForms = codedData.copy().loc[codedData["temporality"] == 0,:]
afterForms = codedData.copy().loc[codedData["temporality"] == 1,:]
undefForms = codedData.copy().loc[codedData["temporality"] == -5,:]
defForms = codedData.copy()
# For subjects with both before and after forms, join the two
beforeAndAfterForms = pd.merge(beforeForms, afterForms, on="anonymousID", suffixes=('_before', '_after'))
#beforeForms.head()
#afterForms.head()
#undefForms.head()
#defForms.head()
#beforeAndAfterForms.head()

In [ ]:
# Remove ID feature
allForms = codedData.copy().drop("anonymousID", axis=1)
beforeForms.drop("anonymousID", axis=1, inplace = True)
afterForms.drop("anonymousID", axis=1, inplace = True)
undefForms.drop("anonymousID", axis=1, inplace = True)
defForms.drop("anonymousID", axis=1, inplace = True)
beforeAndAfterForms.drop("anonymousID", axis=1, inplace = True)

With corrected scientific answers


In [ ]:
# Remove timestamp and comments features
codedCorrectedData = correctedData.copy().drop(['timestamp', "comments"], axis=1)

In [ ]:
# Fill NaN cells
codedCorrectedData["QCuriosityBiology"].fillna("Moderately", inplace = True)
codedCorrectedData["QCuriositySyntheticBiology"].fillna("Moderately", inplace = True)
codedCorrectedData["QCuriosityEngineering"].fillna("Moderately", inplace = True)
codedCorrectedData["QCuriosityVideoGames"].fillna("Moderately", inplace = True)
codedCorrectedData["gameInterest"].fillna("Moderately", inplace = True)
codedCorrectedData["biologyInterest"].fillna("Moderately", inplace = True)
codedCorrectedData["gameFrequency"].fillna("Moderately", inplace = True)
codedCorrectedData["synthBioKnowledge"].fillna("No", inplace = True)
codedCorrectedData["previousPlay"].fillna("No / not yet", inplace = True)
codedCorrectedData["QVolunteer"].fillna("No", inplace = True)
codedCorrectedData["QEnjoyed"].fillna("No", inplace = True)
codedCorrectedData["synthBioKnowledge"].fillna("No", inplace = True)
codedCorrectedData["biologyStudy"].fillna("I don't know", inplace = True)
codedCorrectedData.dropna(how='any', inplace = True)

# Replace by code
for rowId in codedCorrectedData.index.values:
    codedCorrectedData.loc[rowId, "QCuriosityBiology"] = QCuriosityBiologyCoding[codedCorrectedData.loc[rowId, "QCuriosityBiology"]]
    codedCorrectedData.loc[rowId, "QCuriositySyntheticBiology"] = QCuriositySyntheticBiologyCoding[codedCorrectedData.loc[rowId, "QCuriositySyntheticBiology"]]
    codedCorrectedData.loc[rowId, "QCuriosityEngineering"] = QCuriosityEngineeringCoding[codedCorrectedData.loc[rowId, "QCuriosityEngineering"]]
    codedCorrectedData.loc[rowId, "QCuriosityVideoGames"] = QCuriosityVideoGamesCoding[codedCorrectedData.loc[rowId, "QCuriosityVideoGames"]]
    codedCorrectedData.loc[rowId, "gameInterest"] = gameInterestCoding[codedCorrectedData.loc[rowId, "gameInterest"]]
    codedCorrectedData.loc[rowId, "biologyStudy"] = biologyStudyCoding[codedCorrectedData.loc[rowId, "biologyStudy"]]
    codedCorrectedData.loc[rowId, "biologyInterest"] = biologyInterestCoding[codedCorrectedData.loc[rowId, "biologyInterest"]]
    codedCorrectedData.loc[rowId, "gameFrequency"] = gameFrequencyCoding[codedCorrectedData.loc[rowId, "gameFrequency"]]
    codedCorrectedData.loc[rowId, "synthBioKnowledge"] = synthBioKnowledgeCoding[codedCorrectedData.loc[rowId, "synthBioKnowledge"]]
    codedCorrectedData.loc[rowId, "previousPlay"] = previousPlayCoding[codedCorrectedData.loc[rowId, "previousPlay"]]
    codedCorrectedData.loc[rowId, "lang"] = languageCoding[codedCorrectedData.loc[rowId, "lang"]]
    codedCorrectedData.loc[rowId, "temporality"] = temporalityCoding[codedCorrectedData.loc[rowId, "temporality"]]
    codedCorrectedData.loc[rowId, "QVolunteer"] = QVolunteerCoding[codedCorrectedData.loc[rowId, "QVolunteer"]]
    codedCorrectedData.loc[rowId, "QEnjoyed"] = QEnjoyedCoding[codedCorrectedData.loc[rowId, "QEnjoyed"]]

In [ ]:
# One-Hot versions of the above dataframes
codedCorrectedData = pd.get_dummies(codedCorrectedData, prefix = ["gender"], columns = ["gender"])
codedCorrectedData.head()

In [ ]:
# Split the forms according to temporality
beforeCorrectedForms = codedCorrectedData.copy().loc[codedCorrectedData["temporality"] == 0,:]
afterCorrectedForms = codedCorrectedData.copy().loc[codedCorrectedData["temporality"] == 1,:]
undefCorrectedForms = codedCorrectedData.copy().loc[codedCorrectedData["temporality"] == -5,:]
defCorrectedForms = codedCorrectedData.copy() # Either before or after
# For subjects with both before and after forms, join the two
beforeAndAfterCorrectedForms = pd.merge(beforeCorrectedForms, afterCorrectedForms, on="anonymousID", suffixes=('_before', '_after'))
#beforeCorrectedForms.head()
#afterCorrectedForms.head()
#undefCorrectedForms.head()
#defCorrectedForms.head()
#beforeAndAfterCorrectedForms.head()

In [ ]:
# Remove ID feature
allCorrectedForms = codedCorrectedData.copy().drop("anonymousID", axis=1)
beforeCorrectedForms.drop("anonymousID", axis=1, inplace = True)
afterCorrectedForms.drop("anonymousID", axis=1, inplace = True)
undefCorrectedForms.drop("anonymousID", axis=1, inplace = True)
defCorrectedForms.drop("anonymousID", axis=1, inplace = True)
beforeAndAfterCorrectedForms.drop("anonymousID", axis=1, inplace = True)

For Classification


In [ ]:
# Use defForms and defCorrectedForms for coded data

RedMetrics


In [ ]:
# Fetch RedMetrics data for subjects which answered the gform
#allData = getAllUserVectorData(getAllResponders(), _source=[])
allData = getAllUserVectorData( getAllResponders(gfdf), rmdf, gfdf, _source = correctAnswers + demographicAnswers)

In [ ]:
def prepareAllData(allData):
    # Put subjects as rows and features as columns
    allData = allData.transpose()
    
    # Drop useless columns in which all rows have the same value
    allData.drop(["switch", "gotomooc"], axis=1, inplace = True)
    # Fill NaN with negatives
    allData.fillna(-1, inplace = True)
    return allData

In [ ]:
allData = prepareAllData(allData)

For clustering


In [ ]:
# Get the subset of subjects who have answered before and after
fullProcessData = allData[allData["scorepretest"] >= 0]
fullProcessData = fullProcessData[fullProcessData["scoreposttest"] >= 0]
#fullProcessData.loc["01e85778-2903-447b-bbab-dd750564ee2d",:]

Combined with questionnaire answers


In [ ]:
#Get the subset of subjects who have answered the after questionnaire
withAfterData = allData.copy()[allData["scoreposttest"] >= 0]
withAfterData['anonymousID'] = withAfterData.index
withAfterData['anonymousID'] = withAfterData['anonymousID'].apply(lambda x: x.strip('"'))
# Join with questionnaire data with complete answers
afterNotCorrected = codedData.copy().loc[codedData["temporality"] == 1,:]
gameAndAfterData = pd.merge(withAfterData, afterNotCorrected, on="anonymousID")
gameAndAfterData.drop("anonymousID", axis=1, inplace = True)
#Join with questionnaire data with corrected answers
afterCorrected = codedCorrectedData.copy().loc[codedCorrectedData["temporality"] == 1,:]
gameAndCorrectedAfterData = pd.merge(withAfterData, afterCorrected, on="anonymousID")
gameAndCorrectedAfterData.drop("anonymousID", axis=1, inplace = True)
#gameAndAfterData.head()
#gameAndCorrectedAfterData.head()

For classification


In [ ]:
def getAllDataClassif(allData):
    
    allDataClassif = allData.copy()
    allDataClassif['anonymousID'] = allData.index
    allDataClassif['anonymousID'] = allDataClassif['anonymousID'].apply(lambda x: x.strip('"'))

    # If checkpoint not reached, set time to 3600 (1h)
    def floorCheckpoints(value):
        if value > 3600:
            return 3600
        return value
    for col in allDataClassif.columns.values.tolist():
        if isinstance(col, np.int64):
            allDataClassif[col] = allDataClassif[col].apply(floorCheckpoints)
    allDataClassif["completionTime"] = allDataClassif["completionTime"].apply(lambda x: min(x, 7200))        

    # floor thoroughness
    allDataClassif["thoroughness"]  = allDataClassif["thoroughness"].apply(lambda x: min(x, 1000))
    return allDataClassif

allDataClassif = getAllDataClassif(allData)

Combined with questionnaire answers


In [ ]:
#Join with questionnaire data with corrected answers
gameAndCorrectedAfterDataClassif = pd.merge(withAfterData, afterCorrected, on="anonymousID")
gameAndCorrectedAfterDataClassif['anonymousID'] = gameAndCorrectedAfterDataClassif['anonymousID'].apply(lambda x: x.strip('"'))
gameAndCorrectedAfterDataClassif.drop(["scorepretest", "temporality"], axis=1, inplace = True)

for col in gameAndCorrectedAfterDataClassif.columns.values.tolist():
    if isinstance(col, np.int64):
        gameAndCorrectedAfterDataClassif[col] = gameAndCorrectedAfterDataClassif[col].apply(floorCheckpoints)
gameAndCorrectedAfterDataClassif["completionTime"] = gameAndCorrectedAfterDataClassif["completionTime"].apply(lambda x: min(x, 7200))

# floor thoroughness
gameAndCorrectedAfterDataClassif["thoroughness"]  = gameAndCorrectedAfterDataClassif["thoroughness"].apply(lambda x: min(x, 1000))

In [ ]:
#Get the subset of subjects who have answered the before questionnaire
withBeforeData = allData.copy()[allData["scorepretest"] >= 0]
withBeforeData['anonymousID'] = withBeforeData.index
withBeforeData['anonymousID'] = withBeforeData['anonymousID'].apply(lambda x: x.strip('"'))
beforeCorrected = codedCorrectedData.copy().loc[codedCorrectedData["temporality"] == 0,:]
#Join with questionnaire data with corrected answers
gameAndCorrectedBeforeDataClassif = pd.merge(withBeforeData, beforeCorrected, on="anonymousID")
gameAndCorrectedBeforeDataClassif['anonymousID'] = gameAndCorrectedBeforeDataClassif['anonymousID'].apply(lambda x: x.strip('"'))
gameAndCorrectedBeforeDataClassif.drop(["scoreposttest", "temporality"], axis=1, inplace = True)

# If checkpoint not reached, set time to 3600 (1h)
for col in gameAndCorrectedBeforeDataClassif.columns.values.tolist():
    if isinstance(col, np.int64):
        gameAndCorrectedBeforeDataClassif[col] = gameAndCorrectedBeforeDataClassif[col].apply(floorCheckpoints)
gameAndCorrectedBeforeDataClassif["completionTime"] = gameAndCorrectedBeforeDataClassif["completionTime"].apply(lambda x: min(x, 7200))

# floor thoroughness
gameAndCorrectedBeforeDataClassif["thoroughness"]  = gameAndCorrectedBeforeDataClassif["thoroughness"].apply(lambda x: min(x, 1000))

In [ ]:


In [ ]:


In [ ]: